Looking into rat sighting v waste data
waste <-
read.csv("DSNY_Monthly_Tonnage_Data_20231202.csv") |>
janitor::clean_names() |>
separate(month, into = c("year", "month"), sep = " / ") |>
group_by(year, month, borough) |>
filter(year >= 2016 & year < 2024 ) |>
summarize(
total_refuse = sum(refusetonscollected, na.rm = TRUE),
total_paper = sum(papertonscollected, na.rm = TRUE),
total_mgp = sum(mgptonscollected, na.rm = TRUE)) |>
mutate_all(tolower) |>
mutate(across(where(is.character), trimws))
waste_2 = waste |>
pivot_longer(
total_refuse:total_mgp,
names_to = "type",
values_to = "tons") |>
mutate(type = substr(type, 7, 12),
tons = as.numeric(tons)) |>
group_by(year, month, borough, type, tons)
sightings <-
read_csv('NYC_Rat_Sightings.csv') |>
janitor::clean_names() |>
separate(created_date, into=c("month","e", "day","f", "year", "g", "time"), sep=c(2,3,5,6,10,11)) |>
select(-e,-f,-g) |>
mutate(date = paste(year, month, day, sep=""),
date = as.numeric(date)) |>
filter(date <= 20231031, date >= 20160101, !incident_zip <= 10000, !incident_zip >11697, !borough %in% c("Unspecified", NA)) |>
select(
-agency, -agency_name, -complaint_type, -descriptor, -landmark, -facility_type, -park_facility_name, -vehicle_type, -taxi_company_borough, -taxi_pick_up_location, -bridge_highway_name, -road_ramp, -bridge_highway_segment, -bridge_highway_direction) |>
select(unique_key, date, year, month, day, everything()) |>
mutate_all(tolower) |>
mutate(across(where(is.character), trimws)) |>
group_by(year, month, borough) |>
summarize(ratcount = n())
merged = inner_join(waste_2, sightings,
by = c("year", "month", "borough")) |>
mutate(tons = as.numeric(tons)) |>
mutate(
combined_ym = paste(year, month, sep = "-"),
combined_ym = ym(combined_ym))
merged_tons = merged |>
group_by(year, month, borough, ratcount) |>
summarize(total_tons = (sum(tons))) |>
mutate(
combined_ym = paste(year, month, sep = "-"),
combined_ym = ym(combined_ym) )
visualization
ggplot(merged, aes(x=month, y = ratcount, color = borough, group = interaction(year, borough))) +
geom_line() +
labs(title = "Total Rat Count by Borough",
x = "Month",
y = "Rat Count",
caption = "Data from NYC Open Data") +
facet_wrap(year ~ . )
ggplot(merged, aes(x=combined_ym, y = ratcount, color = borough, group = interaction(year, borough))) +
geom_line() +
labs(title = "Total Rat Count by Borough",
x = "Month",
y = "Rat Count",
caption = "Data from NYC Open Data")
ggplot(merged_tons, aes(x=combined_ym, y = total_tons, color = borough, group = interaction(year, borough))) +
geom_line() +
labs(title = "Total Tons of Trash by Borough",
x = "Date",
y = "Total Tons",
caption = "Data from DSNY Monthly Tonnage Data")
ggplot(merged, aes(x = ratcount, y = tons, color = type)) +
geom_point(size = 2, alpha = .6) +
ylim(0, 85000) +
labs(title = "Tons of Trash by Rat Count and Trash Type",
x = "Rat Count",
y = "Total Tons",
color = "Type of Waste",
caption = "Data from NYC Open Data and DSNY Monthly Tonnage Data")
ggplot(merged_tons, aes(y = ratcount, x = total_tons, color = borough)) +
geom_point(size = 2, alpha = .6) +
ylim(0, 1200) +
labs(title = "Rat Count by Total Tons of Trash",
y = "Rat Count",
x = "Total Tons",
caption = "Data from NYC Open Data and DSNY Monthly Tonnage Data")
ggplot(merged_tons, aes(x = total_tons, y = ratcount, color = borough)) +
facet_wrap(borough~.) +
geom_point(size = 1, alpha = .6) +
ylim(0, 1200) +
labs(title = "Rat Count by Total Tons of Waste",
x = "Total Tons",
y = "Rat Count",
color = "Type of Waste",
caption = "Data from NYC Open Data and DSNY Monthly Tonnage Data")
merged_tons |>
mutate(text_label = str_c("Tons of Trash: ", total_tons, "\nNumber of Rats: ", ratcount)) |>
plot_ly(
x = ~ratcount, y = ~total_tons, type = "scatter", mode = "markers",
color = ~borough, text = ~text_label, alpha = 0.5)
merged_tons |>
mutate(text_label = str_c("Number of Rats: ", ratcount, "\nTons of Trash: ", total_tons)) |>
plot_ly(
y = ~ratcount, x = ~total_tons, type = "scatter", mode = "markers",
color = ~borough, text = ~text_label, alpha = 0.5)